## Jens Hainmueller; Jonathan Mummolo; Yiqing Xu
## This Version: 2015.12.05

######   Interpreting Interaction Models  #######

## 1. inter.rawplot: first look at the data: D, X, Y
## 2. inter.gam: GAM plots
## 3. inter.binning: estimates by X bins
## 4. inter.kernel: kernel smooth plot
## 5. Xstat: statistics of the moderator
## 6. replicate
## 7. plot.all: produce all plots

## Appendix: vcovCluster: clustered standard error

#################################################

inter.rawplot<-function(Y,D,X,data,nbins=3,cuts=NULL, span=NULL,
                        Ylabel=NULL,Dlabel=NULL,Xlabel=NULL, weights=NULL,
                        pos=NULL){ 
    
    ## Y: outcome
    ## D: "treatment" indicator
    ## X: covariate to be interacted with D
    ## nbins: ## of bins
    ## cuts: specified cut-points
    ## span: bandwidth for loess
    
    ## load packages
    library(ggplot2)
    
    ## variable labels
    if (is.null(Ylabel)==TRUE) Ylabel=Y
    if (is.null(Xlabel)==TRUE) Xlabel=X
    if (is.null(Dlabel)==TRUE) Dlabel=D
    
    ## ploting
    if (length(unique(data[,D]))==2) { ## binary case
        
        ## plotting
        treat.lab<-c(paste(Dlabel,"= 0"),paste(Dlabel,"= 1"))
        data.aug<-data
        data.aug$treat<-factor(data[,D], labels=treat.lab)

        if (is.null(pos)==TRUE) {
            box.pos.co <- min(data[,Y])
            box.pos.tr <- max(data[,Y])
        } else {
            box.pos.co <- pos[1]
            box.pos.tr <- pos[2]
        }

        tr <- which(data[,D]==1)
        co <- which(data[,D]==0)
        qt90.tr <- quantile(data[tr,X],c(0.05,0.95))
        qt90.co <- quantile(data[co,X],c(0.05,0.95))
        qt50.tr <- quantile(data[tr,X],c(0.25,0.75))
        qt50.co <- quantile(data[co,X],c(0.25,0.75))
        med.tr <- median(data[tr,X])
        med.co <- median(data[co,X])
        
        data.aug$qt90 <- NA
        data.aug$qt90[which(data.aug[,D]==1 &
                            data.aug[,X]>=qt90.tr[1] &
                            data.aug[,X]<=qt90.tr[2])]<- box.pos.tr
        data.aug$qt90[which(data.aug[,D]==0 &
                            data.aug[,X]>=qt90.co[1] &
                            data.aug[,X]<=qt90.co[2])]<- box.pos.co
        data.aug$qt50 <- NA
        data.aug$qt50[which(data.aug[,D]==1 &
                            data.aug[,X]>=qt50.tr[1] &
                            data.aug[,X]<=qt50.tr[2])]<- box.pos.tr
        data.aug$qt50[which(data.aug[,D]==0 &
                            data.aug[,X]>=qt50.co[1] &
                            data.aug[,X]<=qt50.co[2])]<- box.pos.co
        data.aug$med <- NA
        data.aug$med[tr[which.min(abs(data.aug[tr,X]-med.tr))]]<- box.pos.tr
        data.aug$med[co[which.min(abs(data.aug[co,X]-med.co))]]<- box.pos.co
        
        
        ## plotting
        if (is.null(weights)==TRUE) {
            p1 <- ggplot(data.aug, aes_string(X, Y))
        } else {
            p1 <- ggplot(data.aug, aes_string(X, Y, weight=weights))
                
        } 
        p1 <- p1 + geom_point() +
            geom_smooth(method = "lm", se = F, fullrange = T,
                        colour = "steelblue", size = 1)
        
        if (is.null(span)==TRUE) {
            p1 <- p1+ geom_smooth(method = "loess", formula = y ~ x,
                            se = F, colour="red") 
        } else {
            p1 <- p1 + geom_smooth(method = "loess", formula = y ~ x,
                            se = F, colour="red",span=span)
        }
        p1 <- p1 + xlab(Xlabel) + ylab(Ylabel)

       
        p1 <- p1 + geom_line(aes_string(X,"qt90"),
                             size=1,colour="grey50")
        p1 <- p1 + geom_line(aes_string(X,"qt50"),
                             size=3,colour="grey50")
        p1 <- p1 + geom_point(aes_string(X,"med"),size=3,
                              shape=21,fill="white",colour="red")

        p1 <- p1 + theme(axis.title = element_text(size=15))

        p1 <- p1 + facet_wrap(~treat, ncol=1) 
        #p1 <- p1 + facet_grid(treat ~.)                
        suppressWarnings(print(p1))
        
        
    } else { # continuous case
        
        ## grouping by X
        if (is.null(cuts)==TRUE) {
            cutoff<-quantile(data[,X],probs=seq(0,1,1/nbins))
        } else {
            cutoff<-cuts
        }
        while (length(unique(cutoff))!=nbins+1) {
            nbins<-nbins-1; cutoff<-quantile(data[,X],probs=seq(0,1,1/nbins));
        } 
        groupID<-cut(data[,X],breaks=cutoff,label=F)
        groupID[which(data[,X]==min(data[,X]))]<-1
        
        ## X labels
        if (nbins==2) {
            gp.lab<-c(paste(Xlabel,": low",sep=""),paste(Xlabel,": high",sep="")) 
        } else if (nbins==3) {
            gp.lab<-c(paste(Xlabel,": low",sep=""),paste(Xlabel,": medium",sep=""),
                      paste(Xlabel,": high",sep="")) 
        } else {
            gp.lab<-c();for (i in 1:nbins) gp.lab<-c(gp.lab,paste(lab,": Q",i,sep=""))
        } 
        groupID <- factor(groupID, labels=gp.lab)
        data.aug <- data
        data.aug$groupID<-groupID
        
        ## plotting
        if (is.null(weights)==TRUE) {
            p1 <- ggplot(data.aug, aes_string(D, Y))
        } else {
            p1 <- ggplot(data.aug, aes_string(D, Y,weight=weights))
        }
        p1 <- p1 + geom_point() + 
            geom_smooth(method = "lm", se = F, fullrange = T,
                        colour = "steelblue", size = 1)
        if (is.null(span)==TRUE) {
            p1 <- p1 +
                geom_smooth(method = "loess", formula = y ~ x,
                            se = F, colour="red") 
        } else {
            p1 <- p1 +
                geom_smooth(method = "loess", formula = y ~ x,
                            se = F, colour="red",span=span) 
        }
        
        p1 <- p1 + xlab(Dlabel) + ylab(Ylabel) + facet_grid(.~groupID)
        print(p1)
    }
    
}


#############################################################
## GAM


inter.gam<-function(Y,D,X,Z=NULL,FE=NULL,data,SE=0,k=10,
                    weights=NULL,angle=c(30,100,-30,-120)){
    
    library(mgcv)
    
    if (is.null(FE)==FALSE) {
        if (is.null(Z)==TRUE) {Z<-c()}
        for (i in 1:length(FE)) {
            Z<-c(Z,paste("as.factor(",FE[i],")",sep=""))
        } 
    }
    
    if (is.null(Z)==TRUE) { # no controls
        formula<-as.formula(paste(Y,"~","s(",D,",",X,",k=",k,")"))
    } else {
        formula<-as.formula(paste(Y,"~","s(",D,",",X,",k=",k,")+",
                                  paste(Z,collapse="+")))
    }
    if (is.null(weights)==TRUE) {
        model<-gam(formula, data=data)
    } else {
        model<-gam(formula, data=data,weights=data[,weights])
    }
    par(mfrow=c(2,2),mar=c(2,2,0,0))
    if (SE==0) {
        for (i in angle) {
            vis.gam(model, view=c(D,X), type="response",cex.lab=1.5,zlab=Y,
                    ticktype="detailed",plot.type="persp",n.grid=40,too.far=0.5,theta=i,phi=20)
        }
    } else {
        for (i in angle) {
            vis.gam(model, view=c(D, X), type="response",cex.lab=1.5,zlab=Y,
                    ticktype="detailed",plot.type="persp",se=2,n.grid=40,too.far=0.5,theta=30,phi=20)
        }
    }
}


####################################################

inter.binning<-function(data,
                        Y, # outcome
                        D, # treatment indicator
                        X, # moderator
                        Z = NULL, # covariates
                        FE = NULL, # fixed effects
                        weights = NULL, # weigthing variable
                        nbins = 3,  # No. of X bins
                        cuts = NULL,
                        var = "standard", # variance type
                        ##  "standard" (default); "robust"; "cluster", "pcse"
                        cl = NULL, # variable to be clustered on
                        time = NULL, # time variable for pcse
                        pairwise = TRUE, # pcse option
                        figure = TRUE,
                        main = NULL,
                        Xlabel = NULL,
                        Dlabel = NULL,
                        Ylabel = NULL,
                        ylim = NULL,
                        interval = NULL,
                        Xdistr = "density" # c("density","histogram")
                        ){
    
    ##library(lmtest)
    library(ggplot2)
    N<-dim(data)[1]
    data[,D]<-as.numeric(data[,D])
    
    ## parsing fixed effects
    if (is.null(FE)==FALSE) {
        if (is.null(Z)==TRUE) {Z<-c()}
        for (i in 1:length(FE)) {
            Z<-c(Z,paste("as.factor(",FE[i],")",sep=""))
        } 
    } 
    
    ## a naive fit
    if (is.null(Z)==FALSE) {
        mod.f<-as.formula(paste(Y,"~",D,"+",X,"+",D,"*",X,"+",paste(Z,collapse="+"),sep=""))
    } else {
        mod.f<-as.formula(paste(Y,"~",D,"+",X,"+",D,"*",X,sep=""))
    }
    if (is.null(weights)==TRUE) {
        mod.naive<-lm(mod.f,data=data)
    } else {
        mod.naive<-lm(mod.f,data=data,weights=data[,weights])
    }    
    ## coefficients
    coefs<-summary(mod.naive)$coefficients[,1]
    coef.D<-coefs[D]
    coef.X<-coefs[X]
    coef.DX<-coefs[paste(D,X,sep=":")] #interaction
    
    ## # variance
    if (is.null(var)==TRUE) {
        var<-"standard"
    }
    if (var=="standard") {
        v<-vcov(mod.naive)
    } else if (var=="robust") {
        require(sandwich)
        v<-vcov(mod.naive,type="HC1") # White with small sample correction
    } else if (var=="cluster") {
        if (is.null(cl)==FALSE) {
            v<-vcovCluster(mod.naive,cluster = data[,cl])
        } else {  # variable to be clustered not supply
            stop("Warning: clustering variable not found, set cl=varname")
        }
    } else if (var=="pcse") {
        library(pcse)
        if (is.null(cl)==FALSE & is.null(time)==FALSE) {
            v<-pcse(mod.naive,groupN=data[,cl],groupT=data[,time],pairwise=pairwise)$vcov
        } else {  # variable to be clustered not supply
            stop("Warning: please supply unit and time indicators, set cl=varname1, time=varname2")
        }
    }
    
    if (var=="pcse") {
        var.D<-v[D,D]
        var.DX<-v[paste(D,X,sep="."),paste(D,X,sep=".")]
        cov.DX<-v[D,paste(D,X,sep=".")]
    } else {
        var.D<-v[D,D]
        var.DX<-v[paste(D,X,sep=":"),paste(D,X,sep=":")]
        cov.DX<-v[D,paste(D,X,sep=":")]
    }
    
    ###  make a vector of the marginal effect of D on Y as X changes 
    X.lvls<-as.numeric(quantile(data[,X], probs=seq(0,1,0.01)))
    marg<-coef.D + coef.DX*X.lvls
    marg
    
    ## the variance is var(B1_D) + X^2*var(B_3) + 2*inst*cov(D, X)
    se<-sqrt(var.D +  X.lvls^2*var.DX + 2*X.lvls*cov.DX)
    df<-mod.naive$df.residual
    crit<-abs(qt(.025, df=df)) # critical values
    
    ##make 95% confidence bands. 
    lb<-marg-crit*se
    ub<-marg+crit*se
    
##################################################
    
    ## grouping by X
    if (is.null(cuts)==TRUE) {
        cuts.X<-quantile(data[,X],probs=seq(0,1,1/nbins))
    } else {
        cuts.X<-cuts
        nbins<-length(cuts)-1
    }
    while (length(unique(cuts.X))!=nbins+1) {
        nbins<-nbins-1; cuts.X<-quantile(data[,X],probs=seq(0,1,1/nbins));
    }      
    groupX<-cut(data[,X],breaks=cuts.X,label=F)
    groupX[which(data[,X]==min(data[,X]))]<-1
    
############## Discre#tize X #################
    
    ## mid points
    x0<-rep(NA,nbins)
    for (i in 1:nbins) x0[i]<-median(data[which(groupX==i),X], na.rm=TRUE)
    
    ## create dummies for bins and interactions
    ## G -- a matrix of group dummies
    ## DG -- a matrix of interactions 
    
    G<-DG<-GX<-DGX<-matrix(0,N,nbins)
    for (i in 1:nbins) {
        G[which(groupX==i),i]<-1
        DG[,i]<-data[,D]*G[,i]
        GX[,i]<-G[,i]*(data[,X]-x0[i])
        DGX[,i]<-DG[,i]*(data[,X]-x0[i])
    }
    
    ## formula and esitmation
    Gs<-GXs<-DGs<-DGXs<-c()
    for (i in 1:nbins)  {
        Gs<-c(Gs,paste("G[,",i,"]",sep=""))
        GXs<-c(GXs,paste("GX[,",i,"]",sep=""))
        DGs<-c(DGs,paste("DG[,",i,"]",sep=""))
        DGXs<-c(DGXs,paste("DGX[,",i,"]",sep=""))
    }
    Xf<-paste(Y,"~ -1+",paste(DGs,collapse="+"),"+",paste(DGXs,collapse="+"),
              "+",paste(Gs,collapse="+"),"+",paste(GXs,collapse="+"),sep="")
    if (is.null(Z)==FALSE) {
        Xf<-paste(Xf,"+",paste(Z,collapse="+"),sep="")
    }
    mod.Xf<-as.formula(Xf)
    if (is.null(weights)==TRUE) {
        mod.X<-lm(mod.Xf,data=data) 
    } else {
        mod.X<-lm(mod.Xf,data=data,weights=data[,weights])
    }    
    
    ## coefficients and CIs
    Xcoefs<-mod.X$coefficients[1:nbins]
    if (var=="standard") {
        X.v<-vcov(mod.X)
    } else if (var=="robust") {
        X.v<-vcov(mod.X,type="HC1") ## White with small sample correction
    } else if (var=="cluster") {
        X.v<-vcovCluster(mod.X,cluster=data[,cl])
    } else if (var=="pcse") {
        if (is.null(Z)==FALSE) {
            exclude<-names(which(is.na(mod.X$coefficients)==TRUE))  ## drop colinear variables
            Z.ex<-setdiff(Z,exclude)
            Xf<-paste(Y,"~ -1+",paste(DGs,collapse="+"),"+",paste(DGXs,collapse="+"),
                      "+",paste(Gs,collapse="+"),"+",paste(GXs,collapse="+"),"+",paste(Z.ex,collapse="+"),sep="")
            mod.X<-lm(as.formula(Xf),data=data)
        }
        X.v<-pcse(mod.X,groupN=data[,cl],groupT=data[,time],pairwise=pairwise)$vcov
    }
    X.v<-X.v[1:nbins,1:nbins] 
    X.se<-sqrt(diag(X.v)) 
    df.X<-mod.X$df.residual
    crit.X<-abs(qt(.025, df=df.X))
    lb.X<-Xcoefs-crit.X*X.se
    ub.X<-Xcoefs+crit.X*X.se
    print(cbind(x0, coef=Xcoefs, se = X.se))

   
################### plotting ###############################
    
    ## margin and label adjustment
    
    if (figure==TRUE) {
        
        if(is.null(Xlabel)==FALSE){
            x.label<-c(paste("Moderator: ", Xlabel, sep=""))
            y.label<-c(paste("Marginal effect of ",Dlabel," on ",Ylabel,sep=""))
        } else {
            x.label<-c(paste("Moderator: ", X, sep=""))
            y.label<-c(paste("Marginal effect of ",D," on ",Y,sep=""))
        }
        out<-data.frame(X.lvls,marg,lb,ub)
        out.bin<-data.frame(x0,Xcoefs,lb.X,ub.X)
        out.bin2<-out.bin[which(is.na(Xcoefs)==FALSE),] ## non missing part
        out.bin3<-out.bin[which(is.na(Xcoefs)==TRUE),]  ## missing part
        errorbar.width<-(max(X.lvls)-min(X.lvls))/20
        
        p1 <- ggplot()
        yrange<-na.omit(c(marg,lb,ub,Xcoefs,lb.X,ub.X))
        if (is.null(ylim)==FALSE) {yrange<-c(ylim[2],ylim[1]+(ylim[2]-ylim[1])*1/6)}
        maxdiff<-(max(yrange)-min(yrange))
        pos<-max(yrange)-maxdiff/20

        ## mark zero
        p1 <- p1 + geom_hline(yintercept=0,colour="white",size=2)

        ## mark the original interval
        if (is.null(interval)==FALSE) {
            p1<- p1 + geom_vline(xintercept=interval,colour="steelblue",
                                 linetype=2,size=1.5)
        }

        ## plotting moderator distribution
        if (is.null(Xdistr) == TRUE) {
            Xdistr <- "density"
        } else if (Xdistr != "density" & Xdistr != "histogram" & Xdistr != "hist") {
            Xdistr <- "density"
        }  
        if (Xdistr == "density") { # density plot

            if (length(unique(data[,D]))==2) { ## binary D

                ## get densities
                if (is.null(weights)==TRUE) {
                    de.co <- density(data[data[,D]==0,X])
                    de.tr <- density(data[data[,D]==1,X])
                } else {
                    suppressWarnings(library(plotrix))
                    suppressWarnings(de.co<-density(data[data[,D]==0,X],
                                                    weights=data[data[,D]==0,weights]))
                    suppressWarnings(de.tr<-density(data[data[,D]==1,X],
                                                    weights=data[data[,D]==1,weights]))
                }
 
                ## put in data frames
                deX.ymin <- min(yrange)-maxdiff/5
                deX.co <- data.frame(
                    x = de.co$x,
                    y = de.co$y/max(de.co$y) * maxdiff/5 + min(yrange) - maxdiff/5
                )
                deX.tr <- data.frame(
                    x = de.tr$x,
                    y = de.tr$y/max(de.tr$y) * maxdiff/5 + min(yrange) - maxdiff/5
                )

                ## color
                feed.col<-col2rgb("gray50")
                col.co<-rgb(feed.col[1]/1000, feed.col[2]/1000,feed.col[3]/1000)
                col.tr<-rgb(red=1, blue=0, green=0)

                ## plotting
                p1 <- p1 + geom_ribbon(data = deX.co,
                                       aes(x = x, ymax = y, ymin = deX.ymin),
                                       fill = col.co, alpha = 0.2) +
                    geom_ribbon(data = deX.tr,
                                aes(x = x, ymax = y, ymin = deX.ymin),
                                fill = col.tr, alpha = 0.2)
               
               
                
            } else { ## continuous D
                
                ## get densities
                if (is.null(weights)==TRUE) {
                    de <- density(data[,X])
                } else {
                    suppressWarnings(library(plotrix))
                    suppressWarnings(de <- density(data[,X],weights=data[,weights]))
                }

                ## put in data frames
                deX.ymin <- min(yrange)-maxdiff/5
                deX <- data.frame(
                    x = de$x,
                    y = de$y/max(de$y) * maxdiff/5 + min(yrange) - maxdiff/5
                ) 

                ## color
                feed.col<-col2rgb("gray50")
                col<-rgb(feed.col[1]/1000, feed.col[2]/1000,feed.col[3]/1000)
                
                ## plotting
                p1 <- p1 + geom_ribbon(data = deX,
                                       aes(x = x, ymax = y, ymin = deX.ymin),
                                       fill = col, alpha = 0.2)  
                
            }
            
        } else  { # histogram plot

            if (length(unique(data[,D]))==2) { ## binary D

                if (is.null(weights)==TRUE) {
                    hist.out<-hist(data[,X],breaks=80,plot=FALSE)
                } else {
                    suppressWarnings(library(plotrix))
                    suppressWarnings(hist.out<-hist(data[,X],data[,weights],
                                                    breaks=80,plot=FALSE))
                } 
                n.hist<-length(hist.out$mids)
                dist<-hist.out$mids[2]-hist.out$mids[1]
                hist.max<-max(hist.out$counts)
                ## count the number of treated
                count1<-rep(0,n.hist)
                treat<-which(data[,D]==max(data[,D]))
                for (i in 1:n.hist) {
                    count1[i]<-sum(data[treat,X]>=hist.out$breaks[i] &
                                   data[treat,X]<hist.out$breaks[(i+1)])
                }
                count1[n.hist]<-sum(data[treat,X]>=hist.out$breaks[n.hist] &
                                    data[treat,X]<=hist.out$breaks[n.hist+1])
                ## put in a data frame
                histX<-data.frame(ymin=rep(min(yrange)-maxdiff/5,n.hist),
                                  ymax=hist.out$counts/hist.max*maxdiff/5+min(yrange)-maxdiff/5,
                                  xmin=hist.out$mids-dist/2,
                                  xmax=hist.out$mids+dist/2,
                                  count1=count1/hist.max*maxdiff/5+min(yrange)-maxdiff/5)
                p1 <- p1 + geom_rect(data=histX,aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax),
                                     colour="gray50",alpha=0,size=0.5) + # control
                    geom_rect(data=histX,aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=count1),
                              fill="red",colour="grey50",alpha=0.3,size=0.5) # treated
                
            }  else { ## continuous D
                hist.out<-hist(data[,X],breaks=80,plot=FALSE)
                n.hist<-length(hist.out$mids)
                dist<-hist.out$mids[2]-hist.out$mids[1]
                hist.max<-max(hist.out$counts)
                
                histX<-data.frame(ymin=rep(min(yrange)-maxdiff/5,n.hist),
                                  ymax=hist.out$counts/hist.max*maxdiff/5+min(yrange)-maxdiff/5,
                                  xmin=hist.out$mids-dist/2,
                                  xmax=hist.out$mids+dist/2)
                p1 <- p1 + geom_rect(data=histX,aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax),
                                     colour="gray50",alpha=0,size=0.5)
            }
        }
        
    
        ## title
        if (is.null(main)==FALSE) {
            p1<-p1 + ggtitle(main) +
                theme(plot.title = element_text(size=35,
                                                lineheight=.8, face="bold"))
        } 

      
        ## labels: L, M, H
        if (nbins==3) {
            p1<-p1 + annotate(geom="text", x=out.bin[1,1], y=pos,
                              label="L",colour="gray50",size=10) +
                annotate(geom="text", x=out.bin[2,1], y=pos,
                         label="M",colour="gray50",size=10) +
                annotate(geom="text", x=out.bin[3,1], y=pos,
                         label="H",colour="gray50",size=10)
        } else if (nbins==4) {
            p1<-p1 + annotate(geom="text", x=out.bin[1,1], y=pos,
                              label="L",colour="gray50",size=10) +
                annotate(geom="text", x=out.bin[2,1], y=pos,
                         label="M1",colour="gray50",size=10) +
                annotate(geom="text", x=out.bin[3,1], y=pos,
                         label="M2",colour="gray50",size=10) +
                annotate(geom="text", x=out.bin[4,1], y=pos,
                         label="H",colour="gray50",size=10)
        }
        
        ## linear plot 
        p1<-p1 + geom_line(data=out,aes(X.lvls,marg))+
            geom_ribbon(data=out, aes(x=X.lvls,ymin=lb,ymax=ub),alpha=0.2)+
                xlab(x.label) + ylab(y.label)
        ## bin estimates
        p1<-p1+ geom_errorbar(data=out.bin2, aes(x=x0, ymin=lb.X, ymax=ub.X),colour="red",
                              width= errorbar.width)+
                                  geom_point(data=out.bin2,aes(x0,Xcoefs),size=4,shape=21,fill="white",colour="red") +
                                      theme(axis.title = element_text(size=15))
        ## in case there's non-overlap
        p1<-p1+annotate(geom="text", x=out.bin3[,1], y=rep(0,dim(out.bin3)[1]),
                        label="NaN",colour="red") 

        if (is.null(ylim)==FALSE) {p1<-p1+coord_cartesian(
                                              ylim = c(ylim[1]-(ylim[2]-ylim[1])*0.25/6,
                                                       ylim[2]+(ylim[2]-ylim[1])*0.4/6))}
        
        plot(p1)

        
    }  # end of plotting

    ## ################# testing  ###############################

    ## binary treatment
    btreat<-length(unique(data[,D]))==2
    
    ## variance of treatment in each group 
    varD<-c()
    for (i in 1:nbins) {
        varD<-c(varD,var(data[groupX==i,D]/mean(data[groupX==i,D])))
    }
    
    ## if the signs are correct
    correctOrder<-ifelse(as.numeric((Xcoefs[1]-Xcoefs[2])*(Xcoefs[2]-Xcoefs[3]))>0,TRUE,FALSE) 
    
    ## p values
    pvalue<-function(i,j){
        stat<-(Xcoefs[i]-Xcoefs[j])/sqrt(X.v[i,i]+X.v[j,j]-2*X.v[i,j])
        p<-(1-pt(abs(stat),df.X))*2
        return(p)
    }
    if (nbins==3) {
        p.twosided<-round(c(pvalue(1,2),pvalue(2,3),pvalue(1,3)),digit=4)
        names(p.twosided)<-c("p.1v2","p.2v3","p.1v3")
        names(Xcoefs)<-c("X_low","X_med","X_high")
    } else if (nbins==2) {
        p.twosided<-round(pvalue(1,2),digit=4)
        names(p.twosided)<-c("p.LvH")
        names(Xcoefs)<-c("X_low","X_high")
    } else if (nbins==4) {
        names(Xcoefs)<-c("X_low","X_med1","X_med2","X_high")
    }
    
    ## storage
    out<-list(binary.treatment=btreat,
              treat.variation.byGroup=round(varD,4))
    if (nbins==3) {
        out<-c(out,list(correctOrder=correctOrder))
    }
    out<-c(out,list(coefs=round(Xcoefs,4)))
    if (nbins%in%c(2,3)) {
        out<-c(out,list(p.twosided=p.twosided)) 
    }
    ## if (nbins==3) {
    ##     out<-c(out,list(betas = round(betas,4), p.ftest = round(p.ftest,4)))
    ## }
    if (figure==TRUE) {out<-c(out,list(graph=p1))} 
    return(out)
}

###########################

inter.kernel<-function(data,Y,D,X,Z=NULL,FE=NULL,weights=NULL,
                       nboot=500,grid=30,h=NULL,cl=NULL,
                       Dlabel=NULL,Xlabel=NULL,Ylabel=NULL,cores=4,parallel=TRUE,
                       ylim=NULL,file=NULL,
                       Xdistr = "density"){
    
    ## nboot: number of bootstraps
    ## grid: points at which kernel regressions are estimated
    ## h: bandwidth
    ## cl: clustering variable
    
    library(ggplot2)
    library(np)
    
    ## parpare
    n<-dim(data)[1]
    newx<-seq(min(data[,X]),max(data[,X]),length=grid) ## grid: at which regressions are run
    newy<-newd<-rep(0,grid)
    newz<-matrix(0,grid,length(Z))
    
    ## partial out fixed effects
    if (is.null(FE)==FALSE) {
        fe<-c()
        for (i in 1:length(FE)) {
            fe<-c(fe,paste("as.factor(",FE[i],")",sep=""))
        }
        Y.tilde<-lm(paste(Y,"~",paste(fe,collapse="+")),data=data)$residuals
        D.tilde<-lm(paste(D,"~",paste(fe,collapse="+")),data=data)$residuals        
    } else {
        Y.tilde<-data[,Y]
        D.tilde<-data[,D]
    }
    DX.tilde<-D.tilde*data[,X]
    
    ## bandwidth selection
    if (is.null(h)==TRUE) {
        bw<-npscoefbw(ydat=Y.tilde, xdat=cbind(D.tilde,DX.tilde,data[,c(X,Z)]),zdat=data[,X])
        h.max<-(max(data[,X])-min(data[,X]))/5
        if (h.max<bw$bw) {
            bw<-npscoefbw(ydat=Y.tilde, xdat=cbind(D.tilde,DX.tilde,data[,c(X,Z)]),zdat=data[,X],bws=h.max,
                          bandwidth.compute=FALSE)
        } 
    } else { ## set bandwidth mannually
        bw<-npscoefbw(ydat=Y.tilde, xdat=cbind(D.tilde,DX.tilde,data[,c(X,Z)]),zdat=data[,X],
                      bws=h,bandwidth.compute=FALSE)
        
    }
    evaluate.data<-cbind(newd,newd,newx,newz)
    mod.new<-npscoef(bw,betas=TRUE,exdat=evaluate.data,ezdat=newx)
    coefs<-coef(mod.new)
    coef<-coefs[,2]+coefs[,3]*newx
    cat("Bandwidth =", round(mod.new$bw,3),"\n")
    
    ## bootstrap standard errors and CI
    if (is.null(cl)==FALSE) { ## find clusters
        clusters<-unique(data[,cl])
        id.list<-split(1:n,data[,cl])
    }
    
    
    ## the function
    getCoef<-function(i) {
        if (is.null(cl)==TRUE) {
            smp<-sample(1:n,n,replace=TRUE)
        } else { ## block bootstrap
            cluster.boot<-sample(clusters,length(clusters),replace=TRUE)
            smp<-unlist(id.list[match(cluster.boot,clusters)])
        }   
        s<-data[smp,]
        if (is.null(FE)==FALSE) {
            s.Y<-lm(paste(Y,"~",paste(fe,collapse="+")),data=s)$residuals
            s.D<-lm(paste(D,"~",paste(fe,collapse="+")),data=s)$residuals            
        } else {
            s.Y<-s[,Y]
            s.D<-s[,D]
        }
        s.DX<-s.D*s[,X]
        mod.boot<-npscoef(bw,tydat=s.Y, txdat=cbind(s.D,s.DX,s[,c(X,Z)]), tzdat=s[,X],
                          exdat=evaluate.data, ezdat=newx,betas=TRUE)
        coefs<-coef(mod.boot)
        out<-coefs[,2]+coefs[,3]*newx
        return(out)
    }
    
    ## start loop
    cat("Bootstrapping:\n")
    if (parallel==TRUE) {
        library(foreach)
        library(doParallel)
        pcl<-makeCluster(cores)  
        registerDoParallel(pcl)
        cat("parallel computing with", cores,"cores...")
        suppressWarnings(
            coefs<-foreach (i=1:nboot, .combine=cbind, .packages="np",
                            .export="getCoef",.inorder=FALSE) %dopar% {getCoef(i)}
        )
        stopCluster(pcl)
        cat("\n") 
    } else {
        coefs<-matrix(NA,grid,nboot)
        for (i in 1:nboot) {
            coefs[,i]<-getCoef(i)
            if (i%%50==0) cat(i) else cat(".")
        }
        cat("\n")        
    }
    
    ## summary
    CI<-t(apply(coefs,1,quantile,c(0.025,0.975)))
    est<-data.frame(cbind("X"=newx,"Coef"=coef,"SE"=apply(coefs,1,sd),"CI_lower"=CI[,1], "CI_upper"=CI[,2]))
    
    
    ## plotting
    if(is.null(Xlabel)==FALSE){
        x.label<-c(paste("Moderator: ", Xlabel, sep=""))
        y.label<-c(paste("Marginal effect of ",Dlabel," on ",Ylabel,sep=""))
    } else {
        x.label<-c(paste("Moderator: ", X, sep=""))
        y.label<-c(paste("Marginal effect of ",D," on ",Y,sep=""))
    }
    ##main<-paste("Gaussian kernel, bandwidth =",signif(bw$bw,3))

    ## mark zero
    p1 <- ggplot() + geom_hline(yintercept=0,colour="white",size=2)
    
    p1 <-  p1 + geom_line(data=est,aes(X,Coef))+
        geom_ribbon(data=est, aes(x=X,ymin=CI_lower,ymax=CI_upper),alpha=0.2)+
        xlab(x.label) + ylab(y.label) + theme(axis.title = element_text(size=15))

    
    ## histogram
    yrange<-na.omit(c(CI))
    if (is.null(ylim)==FALSE) {yrange<-c(ylim[2],ylim[1]+(ylim[2]-ylim[1])*1/6)}
    maxdiff<-(max(yrange)-min(yrange))

    
    ## plotting moderator distribution
    if (is.null(Xdistr) == TRUE) {
        Xdistr <- "density"
    } else if (Xdistr != "density" & Xdistr != "histogram" & Xdistr != "hist") {
        Xdistr <- "density"
    }  
    if (Xdistr == "density") { # density plot

        if (length(unique(data[,D]))==2) { ## binary D

            ## get densities
            if (is.null(weights)==TRUE) {
                de.co <- density(data[data[,D]==0,X])
                de.tr <- density(data[data[,D]==1,X])
            } else {
                suppressWarnings(library(plotrix))
                suppressWarnings(de.co<-density(data[data[,D]==0,X],
                                                weights=data[data[,D]==0,weights]))
                suppressWarnings(de.tr<-density(data[data[,D]==1,X],
                                                weights=data[data[,D]==1,weights]))
            }
            
            ## put in data frames
            deX.ymin <- min(yrange)-maxdiff/5
            deX.co <- data.frame(
                x = de.co$x,
                y = de.co$y/max(de.co$y) * maxdiff/5 + min(yrange) - maxdiff/5
            )
            deX.tr <- data.frame(
                x = de.tr$x,
                y = de.tr$y/max(de.tr$y) * maxdiff/5 + min(yrange) - maxdiff/5
            )

            ## color
            feed.col<-col2rgb("gray50")
            col.co<-rgb(feed.col[1]/1000, feed.col[2]/1000,feed.col[3]/1000)
            col.tr<-rgb(red=1, blue=0, green=0)

            ## plotting
            p1 <- p1 + geom_ribbon(data = deX.co,
                                   aes(x = x, ymax = y, ymin = deX.ymin),
                                   fill = col.co, alpha = 0.2) +
                geom_ribbon(data = deX.tr,
                            aes(x = x, ymax = y, ymin = deX.ymin),
                            fill = col.tr, alpha = 0.2)
            
            
            
        } else { ## continuous D
            
            ## get densities
            if (is.null(weights)==TRUE) {
                de <- density(data[,X])
            } else {
                suppressWarnings(library(plotrix))
                suppressWarnings(de <- density(data[,X],weights=data[,weights]))
            }

            ## put in data frames
            deX.ymin <- min(yrange)-maxdiff/5
            deX <- data.frame(
                x = de$x,
                y = de$y/max(de$y) * maxdiff/5 + min(yrange) - maxdiff/5
            ) 

            ## color
            feed.col<-col2rgb("gray50")
            col<-rgb(feed.col[1]/1000, feed.col[2]/1000,feed.col[3]/1000)
            
            ## plotting
            p1 <- p1 + geom_ribbon(data = deX,
                                   aes(x = x, ymax = y, ymin = deX.ymin),
                                   fill = col, alpha = 0.2)  
            
        } 
        
    } else  { # histogram plot

        if (length(unique(data[,D]))==2) { ## binary D

            if (is.null(weights)==TRUE) {
                hist.out<-hist(data[,X],breaks=80,plot=FALSE)
            } else {
                suppressWarnings(library(plotrix))
                suppressWarnings(hist.out<-hist(data[,X],data[,weights],
                                                breaks=80,plot=FALSE))
            } 
            n.hist<-length(hist.out$mids)
            dist<-hist.out$mids[2]-hist.out$mids[1]
            hist.max<-max(hist.out$counts)
            ## count the number of treated
            count1<-rep(0,n.hist)
            treat<-which(data[,D]==max(data[,D]))
            for (i in 1:n.hist) {
                count1[i]<-sum(data[treat,X]>=hist.out$breaks[i] &
                               data[treat,X]<hist.out$breaks[(i+1)])
            }
            count1[n.hist]<-sum(data[treat,X]>=hist.out$breaks[n.hist] &
                                data[treat,X]<=hist.out$breaks[n.hist+1])
            ## put in a data frame
            histX<-data.frame(ymin=rep(min(yrange)-maxdiff/5,n.hist),
                              ymax=hist.out$counts/hist.max*maxdiff/5+min(yrange)-maxdiff/5,
                              xmin=hist.out$mids-dist/2,
                              xmax=hist.out$mids+dist/2,
                              count1=count1/hist.max*maxdiff/5+min(yrange)-maxdiff/5)
            p1 <- p1 + geom_rect(data=histX,aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax),
                                 colour="gray50",alpha=0,size=0.5) + # control
                geom_rect(data=histX,aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=count1),
                          fill="red",colour="grey50",alpha=0.3,size=0.5) # treated
            
        }  else { ## continuous D
            hist.out<-hist(data[,X],breaks=80,plot=FALSE)
            n.hist<-length(hist.out$mids)
            dist<-hist.out$mids[2]-hist.out$mids[1]
            hist.max<-max(hist.out$counts)
            
            histX<-data.frame(ymin=rep(min(yrange)-maxdiff/5,n.hist),
                              ymax=hist.out$counts/hist.max*maxdiff/5+min(yrange)-maxdiff/5,
                              xmin=hist.out$mids-dist/2,
                              xmax=hist.out$mids+dist/2)
            p1 <- p1 + geom_rect(data=histX,aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax),
                                 colour="gray50",alpha=0,size=0.5)
        }
    }

    
    if (is.null(ylim)==FALSE) {
        p1<-p1+coord_cartesian(ylim = c(ylim[1]-(ylim[2]-ylim[1])*0.25/6,
                                        ylim[2]+(ylim[2]-ylim[1])*0.4/6))}
    if (is.null(file)==FALSE) {
        pdf(file)
        plot(p1)
        graphics.off() 
    } else {
        plot(p1)
    }
    
    output<-list(bw=bw$bw,kernel=bw$ckertype,est=est, graph=p1)
    return(output)
}

#############

plot.all<-function(data, graphpath, name, main, Y, D, X, Z=NULL, FE=NULL,
                   Ylabel=NULL, Dlabel=NULL, Xlabel=NULL, 
                   smooth=TRUE, cuts=NULL, cuts2=NULL,
                   vartype=NULL, cl=NULL, time=NULL, pairwise=TRUE, weights=NULL,
                   bandwidth=NULL,smooth.lim=NULL,smooth.cl=TRUE, nboot=500,
                   grid=50,span=NULL,interval=NULL, cores=4,
                   raw.pos=NULL, density=T) {
    
    library(ggplot2)
    begin.time<-Sys.time()
        
    if (is.null(vartype)==TRUE) {vartype<-"standard"}
    
    if (length(unique(data[,D]))==2) { # binary case
        
        cat("Step 1: scatterplot\n")
        pdf(paste(graphpath,name,"_raw.pdf",sep=""),height=7,width=5)
        inter.rawplot(Y=Y,D=D,X=X,data=data,
                      Xlabel=Xlabel, Ylabel=Ylabel, Dlabel=Dlabel,span=span,
                      pos=raw.pos)
        graphics.off()
        
        cat("Step 2: binned estimates\n")
        pdf(paste(graphpath,name,"_est.pdf",sep=""))
        out.est1<-inter.binning(Y=Y,D=D,X=X,Z=Z,FE=FE,data=data,
                                Xlabel=Xlabel, Ylabel=Ylabel, Dlabel=Dlabel,
                                nbins=3, cuts=cuts, var=vartype, cl=cl, time=time, pairwise=pairwise, weights=weights,
                                main=main,interval=interval,ylim=smooth.lim)
        graphics.off()
        
        ##regenerate without title
        pdf(paste(graphpath,name,"_est0.pdf",sep=""))
        inter.binning(Y=Y,D=D,X=X,Z=Z,FE=FE, data=data, Xlabel=Xlabel, Ylabel=Ylabel, Dlabel=Dlabel,
                      nbins=3, cuts=cuts, var=vartype, cl=cl, time=time, pairwise=pairwise,
                      weights=weights,interval=interval, ylim=smooth.lim)
        graphics.off()
        
        out.est2<-inter.binning(Y=Y,D=D,X=X,Z=Z,FE=FE,
                                data=data, Xlabel=Xlabel, Ylabel=Ylabel, Dlabel=Dlabel,
                                nbins=2, cuts=cuts2, var=vartype, cl=cl, time=time, pairwise=pairwise,
                                weights=weights, figure=FALSE)
        
        
        if (smooth==TRUE) {
            cat("Step 3: kernal smoothed estimates\n")
            if (vartype=="cluster") {cl2<-cl} else {cl2<-NULL}
            out.kernel<-inter.kernel(data=data, Y=Y,D=D,X=X,Z=Z,FE=FE,
                                     nboot=nboot,cl=cl2,cores=cores,
                                     Dlabel=Dlabel, Xlabel=Xlabel, Ylabel=Ylabel,
                                     h=bandwidth, ylim=smooth.lim,
                                     file=paste(graphpath,name,"_smooth.pdf",sep=""))
        }
        
    } else { # continuous case
        
        cat("Step 1: scatterplot\n")
        pdf(paste(graphpath,name,"_raw.pdf",sep=""),height=4,width=10)
        inter.rawplot(Y=Y,D=D,X=X,data=data,Xlabel=Xlabel, Ylabel=Ylabel, Dlabel=Dlabel, cuts=cuts,span=span)
        graphics.off()
        
        cat("Step 2: GAM plot\n")
        pdf(paste(graphpath,name,"_gam.pdf",sep=""))
        inter.gam(Y=Y,D=D,X=X,Z=NULL,FE=NULL,data=data,SE=0, weights=weights)
        graphics.off()
        
        cat("Step 3: binned estimates\n")
        pdf(paste(graphpath,name,"_est.pdf",sep=""))
        out.est1<-inter.binning(Y=Y,D=D,X=X,Z=Z,FE=FE,
                                data=data,Xlabel=Xlabel, Ylabel=Ylabel, Dlabel=Dlabel,
                                nbins=3, cuts=cuts, var=vartype, cl=cl, time=time, pairwise=pairwise,
                                weights=weights,
                                main=main,interval=interval,ylim=smooth.lim)
        graphics.off()
        
        pdf(paste(graphpath,name,"_est0.pdf",sep=""))
        inter.binning(Y=Y,D=D,X=X,Z=Z,FE=FE, data=data, Xlabel=Xlabel, Ylabel=Ylabel, Dlabel=Dlabel,
                      nbins=3,cuts=cuts, var=vartype,cl=cl,time=time, pairwise=pairwise,
                      weights=weights,interval=interval, ylim=smooth.lim)
        graphics.off()
        
        out.est2<-inter.binning(Y=Y,D=D,X=X,Z=Z,FE=FE,
                                data=data, Xlabel=Xlabel, Ylabel=Ylabel, Dlabel=Dlabel,
                                nbins=2, cuts=cuts2, var=vartype,cl=cl,time=time, pairwise=pairwise,
                                weights=weights, figure=FALSE)
        
        
        if (smooth==TRUE) {
            cat("Step 4: kernal smoothed estimates\n")
            if (vartype=="cluster" & smooth.cl==TRUE) {cl2<-cl} else {cl2<-NULL}
            out.kernel<-inter.kernel(Y=Y,D=D,X=X,Z=Z,FE=FE, data=data, nboot=nboot,grid=grid,cl=cl2, cores=cores,
                                     Ylabel=Ylabel,Dlabel=Dlabel,Xlabel=Xlabel, h=bandwidth, ylim=smooth.lim,
                                     file=paste(graphpath,name,"_smooth.pdf",sep=""))
        } 
    }
    
    cat("\n### Statistics ###\n")
    output<-list(binary.treatment=out.est1$binary.treatment,
                 treat.variation=out.est1$treat.variation.byGroup,
                 correct.order=out.est1$correctOrder,
                 three.bin.coefs=out.est1$coefs,
                 three.bin.pvalues=out.est1$p.twosided,
                 two.bin.pvalues=out.est2$p.twosided)
                 ## betas = out.est1$betas,
                 ## p.ftest = out.est1$p.ftest)
                                        #if (smooth==TRUE) output<-c(output, out.kernel)
    print(Sys.time()-begin.time)
    return(output)
    
}


#############
inter.test<-function(Y,D,X,Z=NULL,FE=NULL, data,
                     cl = NULL, vartype = NULL,
                     pairwise = TRUE,  time=NULL,
                     weights=NULL, nbins=3, cuts=NULL){
    
    ## Y: outcome
    ## D: "treatment" indicator
    ## X: covariate to be interacted with D
    ## Z: control variables
    ## weights: weighting variable
    ## nbins: # of X bins
    ## var: standard (default); "robust"; "cluster", "pcse"
    ## cl: variable to be clustered on
    ## time: time variable for pcse
    ## pairwaise: pcse option

    
    N<-dim(data)[1]
    data[,D]<-as.numeric(data[,D])
    
    ## parsing fixed effects
    if (is.null(FE)==FALSE) {
        if (is.null(Z)==TRUE) {Z<-c()}
        for (i in 1:length(FE)) {
            Z<-c(Z,paste("as.factor(",FE[i],")",sep=""))
        } 
    }
    
    ## formula
    formula0 <- paste(Y,"~",D,"+",X,"+",D,"*",X)
     
    
    ## ############ Binning #########################
    
    ## grouping by X
    if (is.null(cuts)==TRUE) {
        cuts.X<-quantile(data[,X],probs=seq(0,1,1/nbins))
    } else {
        cuts.X<-cuts
        nbins<-length(cuts)-1
    }
    while (length(unique(cuts.X))!=nbins+1) {
        nbins<-nbins-1; cuts.X<-quantile(data[,X],probs=seq(0,1,1/nbins));
    }      
    groupX<-cut(data[,X],breaks=cuts.X,label=F)
    groupX[which(data[,X]==min(data[,X]))]<-1

    ## create dummies for bins and interactions
    ## G -- a matrix of group dummies
    ## DG -- a matrix of interactions
    
    ## mid points
    x0<-rep(NA,nbins)
    for (i in 1:nbins) x0[i]<-median(data[which(groupX==i),X], na.rm=TRUE)
     
    
    G<-DG<-GX<-DGX<-matrix(0,N,(nbins-1))
    for (i in 1:(nbins-1)) {
        G[which(groupX==(i+1)),i]<-1
        DG[,i]<-data[,D]*G[,i]
        GX[,i]<-data[,X]*G[,i]
        DGX[,i]<-data[,D]*data[,X]*G[,i]
    }
    
    ## formula and esitmation
    Gs<-GXs<-DGs<-DGXs<-c()
    for (i in 2:nbins)  {
        Gs<-c(Gs,paste("G",i,sep=""))
        GXs<-c(GXs,paste("GX",i,sep=""))
        DGs<-c(DGs,paste("DG",i,sep=""))
        DGXs<-c(DGXs,paste("DGX",i,sep=""))
    }
    colnames(G) <- Gs
    colnames(DG) <- DGs
    colnames(GX) <- GXs
    colnames(DGX) <- DGXs
    
    data.aug <- cbind.data.frame(data, G, DG, GX, DGX)
    formula1<-paste(formula0,
              "+",paste(Gs,collapse=" + "),
              "+",paste(GXs,collapse=" + "),
              "+",paste(DGs,collapse=" + "),
              "+",paste(DGXs,collapse=" + "))

    if (is.null(Z)==FALSE) {
        formula0 <- paste(formula0, "+",paste(Z,collapse=" + "))
        formula1 <- paste(formula1, "+",paste(Z,collapse=" + "))
    } 
    
    ##### fit the two models
 
    if (is.null(weights)==TRUE) {
        mod.re<-lm(as.formula(formula0),data=data.aug) 
        mod.un<-lm(as.formula(formula1), data=data.aug) 
    } else {
        mod.re<-lm(as.formula(formula0), data=data.aug, weights=data.aug[,weights])
        mod.un<-lm(as.formula(formula1), data=data.aug, weights=data.aug[,weights])
    }

    ## vcov
    if (is.null(vartype)==TRUE) vartype <- "standard"
    if (vartype=="standard") {
        v<-vcov(mod.un)
    } else if (vartype=="robust") {
        require(sandwich)
        v<-vcov(mod.un,type="HC1") # White with small sample correction
    } else if (vartype=="cluster") {
        if (is.null(cl)==FALSE) {
            v<-vcovCluster(mod.un,cluster = data.aug[,cl])
        } else {  # variable to be clustered not supply
            stop("Warning: clustering variable not found, set cl=varname")
        }
    } else if (vartype=="pcse") {
        library(pcse)
        if (is.null(cl)==FALSE & is.null(time)==FALSE) {
            v<-pcse(mod.un,
                    groupN=data.aug[,cl],
                    groupT=data.aug[,time],
                    pairwise=pairwise)$vcov
        } else {  # variable to be clustered not supply
            stop("Warning: please supply unit and time indicators, set cl=varname1, time=varname2")
        }
    } 
    ## rss.un <- sum(mod.un$residuals^2)
    ## rss.re <- sum(mod.re$residuals^2)
    ## df.un <- mod.un$df.residual
    ## f.stat <- ((rss.re-rss.un)/8)/(rss.un/df.un)
    ## p.ftest <- 1 - pchisq(f.stat, df=c(8,df.un))
    ## Ftest <- c("f.stat"=f.stat,"p.value"=p.ftest)

    library(lmtest)
    wald <- waldtest(mod.re, mod.un, test="Chisq", vcov=v)
    p.wald <- wald[[4]][2]
    print(p.wald)
     
    out <- list(wald = wald)

    return(out)
    
    
}

#############

## vcovCluster.r 
## function to compute var-cov matrix using clustered robust standard errors
## inputs:
## model = model object from call to lm or glm
## cluster = vector with cluster ID indicators
## output:
## cluster robust var-cov matrix
## to call this for a model directly use:
## coeftest(model,vcov = vcovCluster(model, cluster))
## formula is similar to Stata's , cluster command

vcovCluster <- function(
    model,
    cluster
)
{
    require(sandwich)
    ## require(lmtest)
    
    if(nrow(model.matrix(model))!=length(cluster)){
        stop("check your data: cluster variable has different N than model")
    }
    M <- length(unique(cluster))
    N <- length(cluster)           
    K <- model$rank   
    if(M<50){
        warning("Fewer than 50 clusters, variances may be unreliable (could try block bootstrap instead).")
    }
    dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
    uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
    rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
    ##colnames(rcse.cov)<-rownames(rcse.cov)<-names(model$coefficients)
    return(rcse.cov)
}

################# Statistics of the moderator

Xstat<-function(x,interval=NULL,prob=0.8) {

    library(Lmoments)
    library(moments)
    library(coda)

    x0<-x
    if (is.null(interval)==FALSE) {
        x<-x[which(x>=interval[1] & x<=interval[2])]
    }
    
    ## moments
    skew<-skewness(x)
    kurt<-kurtosis(x)

    ## L moments
    out.lm<-Lmoments(x,returnobject=TRUE)
    tau3<-out.lm$ratios[3]
    tau4<-out.lm$ratios[4]

    ## 80% coverage
    out.hpd <- HPDinterval(as.mcmc(x0), prob=prob)
    ratio <- (out.hpd[2]-out.hpd[1])/(max(x)-min(x))

    out<-round(c(n=length(x), skew=skew, kurt=kurt,
                 tau3=tau3, tau4=tau4, ratio=ratio),3)
    return(out)
}



################# replicate

replicate<-function(data,Y, D, X, Z=NULL, FE=NULL, weight=NULL){
    f1<-paste(Y,"~",D)
    inter<-paste(X,D,sep="*")


    ## parsing fixed effects
    if (is.null(FE)==FALSE) {
        if (is.null(Z)==TRUE) {Z<-c()}
        for (i in 1:length(FE)) {
            Z<-c(Z,paste("as.factor(",FE[i],")",sep=""))
        } 
    }
    
    if(length(Z)==1){controls<-Z}
    if(length(Z)>1){
        controls<-Z[1]
        for(i in 2:length(Z)){ controls<-paste(controls, Z[i],sep="+")}
    }
    
    form<-paste(f1, inter, controls, sep="+" )
    
    
    if(is.null(weight)==TRUE){
        m<-summary(lm(form, data=data))
    } else {
        data.aug<-data
        data.aug$weights<-data[,weight]
        m<-summary(lm(form, data=data.aug, weights=weights))
    }
    
    return(m)
    
}


